import numpy as np
import torch

from attacks import Attack
import torch.nn.functional as F
from scipy import stats as st

from constants import DEVICE
from utils import cross_entropy_loss, de_normalization, normalization


class EPNTIM(Attack):
    """ EPNTIM: EPN + TIM """

    def __init__(self, model, eps=16 / 255, steps=10, decay=1.0, epochs=5, kernel_name='gaussian', len_kernel=15, nsig=3):
        """
        :param model: DNN model
        :param eps: the maximum perturbation
        :param steps: the number of iterations
        :param decay: the decay factor
        """
        super().__init__("EPNTIM", model)
        self.eps = eps
        self.steps = steps
        self.alpha = self.eps / self.steps
        self.decay = decay
        self.epochs = epochs
        self.kernel_name = kernel_name
        self.len_kernel = len_kernel
        self.nsig = nsig
        self.stacked_kernel = torch.from_numpy(self.kernel_generation()).to(DEVICE)

    def kernel_generation(self):
        if self.kernel_name == 'gaussian':
            kernel = self.gkern().astype(np.float32)
        else:
            raise NotImplementedError

        stack_kernel = np.stack([kernel, kernel, kernel])
        stack_kernel = np.expand_dims(stack_kernel, 1)
        return stack_kernel

    def gkern(self):
        """Returns a 2D Gaussian kernel array."""
        x = np.linspace(-self.nsig, self.nsig, self.len_kernel)
        kern1d = st.norm.pdf(x)
        kernel_raw = np.outer(kern1d, kern1d)
        kernel = kernel_raw / kernel_raw.sum()
        return kernel

    def forward(self, images, labels):
        targets = F.one_hot(labels.type(torch.int64), 1000).float().to(DEVICE)
        images_de_normalized = de_normalization(images)
        images_min = torch.clamp(images_de_normalized - self.eps, min=0.0, max=1.0)
        images_max = torch.clamp(images_de_normalized + self.eps, min=0.0, max=1.0)

        g = torch.zeros_like(images)
        for _ in range(self.epochs):
            adv_hat = images.clone()
            for _ in range(self.steps):
                adv_hat_exchanged = torch.stack([x[torch.randperm(3), :, :] for x in adv_hat])

                logits_hat = self.model(adv_hat_exchanged)
                loss_hat = cross_entropy_loss(logits_hat, targets)
                grad_hat = torch.autograd.grad(loss_hat, adv_hat)[0]
                grad_hat = F.conv2d(grad_hat, self.stacked_kernel, stride=1, padding='same', groups=3)

                adv_hat_de_normalized = de_normalization(adv_hat)
                adv_wave_de_normalized = adv_hat_de_normalized + self.alpha * grad_hat / torch.mean(torch.abs(grad_hat),
                                                                                                    dim=(1, 2, 3),
                                                                                                    keepdim=True) + self.decay * self.alpha * g
                adv_wave = normalization(adv_wave_de_normalized)

                logits_wave = self.model(adv_wave)
                loss_wave = cross_entropy_loss(logits_wave, targets)
                grad_wave = torch.autograd.grad(loss_wave, adv_wave)[0]
                grad_wave = F.conv2d(grad_wave, self.stacked_kernel, stride=1, padding='same', groups=3)

                g = self.decay * g + grad_hat / torch.mean(torch.abs(grad_hat), dim=(1, 2, 3),
                                                           keepdim=True) + grad_wave / torch.mean(torch.abs(grad_wave),
                                                                                                  dim=(1, 2, 3),
                                                                                                  keepdim=True)

                adv_hat_de_normalized = de_normalization(adv_hat)
                adv_hat_de_normalized = torch.clamp(adv_hat_de_normalized + self.alpha * torch.sign(g), min=images_min,
                                                    max=images_max)
                adv_hat = normalization(adv_hat_de_normalized)

        adv = images.clone()
        for _ in range(self.steps):
            logits = self.model(adv)
            loss = cross_entropy_loss(logits, targets)
            grad = torch.autograd.grad(loss, adv)[0]
            grad = F.conv2d(grad, self.stacked_kernel, stride=1, padding='same', groups=3)

            adv_de_normalized = de_normalization(adv)
            adv_de_normalized = adv_de_normalized + self.alpha * grad / torch.mean(torch.abs(grad), dim=(1, 2, 3),
                                                                                   keepdim=True) + self.decay * self.alpha * g
            pre_adv = normalization(adv_de_normalized)

            pre_logits = self.model(pre_adv)
            pre_loss = cross_entropy_loss(pre_logits, targets)
            pre_grad = torch.autograd.grad(pre_loss, pre_adv)[0]
            pre_grad = F.conv2d(pre_grad, self.stacked_kernel, stride=1, padding='same', groups=3)

            g = self.decay * g + grad / torch.mean(torch.abs(grad), dim=(1, 2, 3),
                                                   keepdim=True) + pre_grad / torch.mean(torch.abs(pre_grad),
                                                                                         dim=(1, 2, 3), keepdim=True)

            adv_de_normalized = de_normalization(adv)
            adv_de_normalized = torch.clamp(adv_de_normalized + self.alpha * torch.sign(g), min=images_min,
                                            max=images_max)
            adv = normalization(adv_de_normalized)

        return adv
